{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Building_Q_table.ipynb",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/PacktPublishing/Modern-Computer-Vision-with-PyTorch/blob/master/Chapter16/Building_Q_table.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Uf-9PVPXip-f"
      },
      "source": [
        "import numpy as np\n",
        "import gym\n",
        "import random\n",
        "env = gym.make('FrozenLake-v0', is_slippery=False)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xhL3Uf8AirCx"
      },
      "source": [
        "action_size=env.action_space.n\n",
        "state_size=env.observation_space.n\n",
        "qtable=np.zeros((state_size,action_size))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qeVgzBOkislL",
        "outputId": "d988f197-ee62-4863-ec4a-5afea6a3a499",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "source": [
        "episode_rewards = []\n",
        "for i in range(10000):\n",
        "    state=env.reset()\n",
        "    total_rewards = 0\n",
        "    for step in range(50):\n",
        "        action=env.action_space.sample()\n",
        "        new_state,reward,done,info=env.step(action)\n",
        "        qtable[state,action]+=0.1*(reward+0.9*np.max(qtable[new_state,:])-qtable[state,action])\n",
        "        state=new_state\n",
        "        total_rewards+=reward\n",
        "    episode_rewards.append(total_rewards)\n",
        "print(qtable)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[0.53143206 0.59047965 0.59048027 0.53143202]\n",
            " [0.5314317  0.         0.6560894  0.59048003]\n",
            " [0.5904795  0.72898869 0.5904782  0.65608855]\n",
            " [0.65608778 0.         0.59047782 0.59047755]\n",
            " [0.59047942 0.65608858 0.         0.53143178]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.         0.80998872 0.         0.65608495]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.65608789 0.         0.72898776 0.59047884]\n",
            " [0.65608665 0.80998743 0.80998606 0.        ]\n",
            " [0.72898105 0.89999117 0.         0.72898107]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.         0.80994439 0.89999233 0.72897284]\n",
            " [0.80984792 0.8999768  0.99999765 0.80991361]\n",
            " [0.         0.         0.         0.        ]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "L9SOLfGnjgZb",
        "outputId": "862030db-9c6e-44fa-f844-cd98a33c7bf2",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "source": [
        "episode_rewards = []\n",
        "epsilon=1\n",
        "max_epsilon=1\n",
        "min_epsilon=0.01\n",
        "decay_rate=0.005\n",
        "for episode in range(1000):\n",
        "    state=env.reset()\n",
        "    total_rewards = 0\n",
        "    for step in range(50):\n",
        "        exp_exp_tradeoff=random.uniform(0,1)\n",
        "        ## Exploitation:\n",
        "        if exp_exp_tradeoff>epsilon:\n",
        "            action=np.argmax(qtable[state,:])\n",
        "        else:\n",
        "            ## Exploration\n",
        "            action=env.action_space.sample()\n",
        "        new_state,reward,done,info=env.step(action)\n",
        "        qtable[state,action]+=0.9*(reward+0.9*np.max(qtable[new_state,:])-qtable[state,action])\n",
        "        state=new_state\n",
        "        total_rewards+=reward\n",
        "    episode_rewards.append(total_rewards)\n",
        "    epsilon=min_epsilon+(max_epsilon-min_epsilon)*np.exp(decay_rate*episode)\n",
        "print(qtable)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[0.531441   0.59049    0.59049    0.531441  ]\n",
            " [0.531441   0.         0.6561     0.59049   ]\n",
            " [0.59049    0.729      0.59049    0.6561    ]\n",
            " [0.6561     0.         0.59049    0.59049   ]\n",
            " [0.59049    0.6561     0.         0.531441  ]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.         0.81       0.         0.6561    ]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.6561     0.         0.729      0.59049   ]\n",
            " [0.6561     0.81       0.81       0.        ]\n",
            " [0.729      0.9        0.         0.729     ]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.         0.         0.         0.        ]\n",
            " [0.         0.80999999 0.9        0.729     ]\n",
            " [0.81       0.9        1.         0.81      ]\n",
            " [0.         0.         0.         0.        ]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2QtCWRXclCue",
        "outputId": "a9ff1f05-52a6-47a0-cb42-9ab007a73937",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "source": [
        "env.reset()\n",
        "for episode in range(1):\n",
        "    state=env.reset()\n",
        "    step=0\n",
        "    done=False\n",
        "    print(\"-----------------------\")\n",
        "    print(\"Episode\",episode)\n",
        "    for step in range(50):\n",
        "        env.render()\n",
        "        action=np.argmax(qtable[state,:])\n",
        "        print(action)\n",
        "        new_state,reward,done,info=env.step(action) \n",
        "        if done:\n",
        "            print(\"Number of Steps\",step+1)\n",
        "            break\n",
        "        state=new_state\n",
        "env.close()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "-----------------------\n",
            "Episode 0\n",
            "\n",
            "\u001b[41mS\u001b[0mFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFFG\n",
            "2\n",
            "  (Right)\n",
            "S\u001b[41mF\u001b[0mFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFFG\n",
            "2\n",
            "  (Right)\n",
            "SF\u001b[41mF\u001b[0mF\n",
            "FHFH\n",
            "FFFH\n",
            "HFFG\n",
            "1\n",
            "  (Down)\n",
            "SFFF\n",
            "FH\u001b[41mF\u001b[0mH\n",
            "FFFH\n",
            "HFFG\n",
            "1\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FF\u001b[41mF\u001b[0mH\n",
            "HFFG\n",
            "1\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HF\u001b[41mF\u001b[0mG\n",
            "2\n",
            "Number of Steps 6\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "djaZt5qAlQMv"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}